library(tidyverse)

w1 <- qualtRics::read_survey("data/study2_w1.csv") %>%
  filter(!duplicated(participantId)) %>%
  filter(Progress == 100)

w2 <- qualtRics::read_survey("data/study2_w2.csv") %>%
  filter(!duplicated(participantId)) %>%
  filter(Progress == 100)

study2 <- w1 %>%
  inner_join(w2, by = "participantId")

text_combined <- study2 %>%
 transmute(text = paste(!!!select(., contains("acc")))) %>%
 mutate(text = gsub("NA", "", text)) %>%
 mutate(text = gsub("[^A-Za-z ]","",text)) %>%
 mutate(text = stringr::str_trim(text)) %>%
 pull(text)

strong <- study2 %>%
 transmute(id = 1:nrow(study2),
           likert = likert.x,
           information = plyr::mapvalues(con,
                                         c("4p", "2p2c", "4c"),
                                         c("pro", "mixed", "con")),
           motivation = relevel(as.factor(condition_strong), "accuracy"),
           pre_attitudes = pre_measure_strong_1,
           pre_certainty = pre_certainty_strong_19,
           pre_duration = Q52,
           pre_durability = Q53,
           pre_certain1 = Q51_1,
           pre_certain2 = Q51_2,
           pre_certain3 = Q51_3,
           pre_certain4 = Q51_4,
           pre_certain5 = Q51_5,
           pre_certain6 = Q51_6,
           pre_certain7 = Q51_7,
           topic = likert_topic,
           accuracy_con1 = rate_acc_stro_con1,
           accuracy_con2 = rate_acc_stro_con2,
           accuracy_con3 = rate_acc_stro_con3,
           accuracy_pro1 = rate_acc_stro_pro1,
           accuracy_pro2 = rate_acc_stro_pro2,
           accuracy_pro3 = rate_acc_stro_pro3,
           strength_con1 = rate_strength_stro_con1,
           strength_con2 = rate_strength_stro_con2,
           strength_con3 = rate_strength_stro_con3,
           strength_pro1 = rate_strength_stro_pro1,
           strength_pro2 = rate_strength_stro_pro2,
           strength_pro3 = rate_strength_stro_pro3,
           post_attitudes = post_outcome_1,
           post_certainty = post_certainty_19,
           post_certain1 = post_certain_multi_S_1,
           post_certain2 = post_certain_multi_S_2,
           post_certain3 = post_certain_multi_S_3,
           post_certain4 = post_certain_multi_S_4,
           post_certain5 = post_certain_multi_S_5,
           post_certain6 = post_certain_multi_S_6,
           post_certain7 = post_certain_multi_S_7,
           polsoph1 = as.numeric(polsoph1 == 2),
           polsoph2 = as.numeric(polsoph2 == 1),
           pid3, birthyr,
           ideology,
           polsoph3 = as.numeric(grepl("2/3|thirds|67", polsoph3, ignore.case = T)),
           polsoph4 = as.numeric(grepl("court|judicial|scotus", polsoph4, ignore.case = T)),
           polsoph5 = as.numeric(grepl("vice|vp", polsoph5, ignore.case = T)),
           timer = log(`time_thought_str_Page Submit`),
           generic_flag = as.numeric(is.na(generic_flag)),
           pre_attitudes_tertiles = factor(ntile(pre_attitudes, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           pre_certainty_tertiles = factor(ntile(pre_certainty, 3), 1:3, c("Tertile 1", "Tertile 2", "Tertile 3")),
           text = text_combined,
           condition = "strong")



# code subtopics (see Policy Agendas Project (PAP) codebook for numberings)
tag_generalmacro <- "econom" #100
tag_econinequality <- "rich|poor|middle class|pover|disparity|inequal|economic divide|wage|in need" #100
tag_inflation <- "inflat|interest rat" #101
tag_unemployment <- "employ" #103
tag_natbudgetdebt <- "spending|debt|fiscal policy" #105
tag_tax <- "tax" #107
tag_pricecontrol <- "price" #110

tag_equality <- "equal" #200 -- equal rights in general
tag_racism <- "race|racism|discriminat" #201
tag_gendersex <- "trans|bathroom|gay|lqbt|same-sex|same sex|marry" #202
tag_abortion <- "abortion|woman|women|reproduc|pro-choice|pro-life|roe|wade" #202
tag_votingrights <- "voting right|register|registration|opportunity to vote|right to vote" #206
tag_speech <- "speech|express" #207 non-governmental issues included
tag_firstamend <- "associat|assembl|church and state" #207
tag_reparations <- "reparation" #299

tag_healthcare <- "healthcare|health care|medical|aca|medicare" #301
tag_healthinsurance <- "health insur" #302
tag_covid <- "covid|pandemic|mask" #331
tag_vaccine <- "vaccin" #331
tag_prenatal <- "childcare|pregnancy" #332
tag_mentalhealth <- "mentally|mental health" #333

tag_farmsubsidies <- "farm subsid|farmer" #402

tag_education <- "education|student|school" #600
tag_highered <- "higher ed|debt|loan" #601

tag_environment <- "environ|sustainab|green" #700
tag_climate <- "climate|warming|air quality" #705

tag_energy <- "energ" #800
tag_nuclear <- "nuclear" #801
tag_gasoil <- "\\soil|\\sgas\\s" #803

tag_immigration <- "immigrat|migrant|border|refuge" #900

tag_transportation <- "transport" #1000
tag_masstransport <- "public trans|buses" #1001
tag_infrastructure <- "infrastructure|public work" #1010

tag_lawcrime <- "law|legal|crime" #1200
tag_marijuanalegal <- "marijuana|cannabis" #1203
tag_court <- "court|supreme|jury|bail|juris|convict" #1204
tag_criminaljustice <- "prison|criminal|justice|felony" #1205
tag_police <- "police|law enforc" #1209
tag_gun <- "gun|bear arms|2nd|second|shoot|weapons" #1209
tag_criminalcode <- "death penalty" #1210

tag_socialwelfare <- "welfare|safety net" #1300
tag_schoollunch <- "school lunch" #1301
tag_UBI <- "universal basic" #1302
tag_elderlyassistance <- "retirement|social security" #1303

tag_housing <- "housing" #1401
tag_affordhous <- "affordable hous|low-income hous" #1406
tag_homeless <- "homeless" #1409

tag_antitrust <- "monopol" #1520
tag_business <- "small busi|local busi" #1521
tag_consumerprivacy <- "consumer privacy" #1525

tag_alliances <- "alliance" #1602
tag_military <- "military|defense budget" #1604
tag_militaryaid <- "foreign conflict" #1606
tag_veterans <- "veteran" #1609
tag_war <- "war|killing innocent" #1619

tag_netneutrality <- "net neutrality" #1709

tag_foreigntrade <- "tariff" #1800
tag_offshoring <- "bring all the jobs back|off-shoring" #1804

tag_foreignaid <- "foreign aid|isolationism|help other countries" #1901

tag_institutions <- "political system|government|term limit" #2011
tag_partypolitics <- "party|third party|political parties" #2011
tag_govtethics <- "rig|cheat|corrupt|lobbying|congress|senator" #2012
tag_elections <- "election|vote|voting|campaign|out of politics|influence politic" #2012

tag_religion <- "religion|god" #31

tag_polarization <- "divided|polariz" #99
tag_democracy <- "democracy" #99


# code major topics (see PAP codebook)
major_macroecon <- "100|101|103|105|107|110"
major_civilrights <- "200|201|202|206|207|299"
major_health <- "301|302|331|332|333"
major_agriculture <- "402"
major_laboremploy <- ""
major_education <- "600|601"
major_environment <- "700|705"
major_energy <- "800|801|803"
major_immigration <- "900"
major_transportation <- "1000|1001|1010"
major_law <- "1200|1203|1204|1205|1209|1210"
major_welfare <- "1300|1301|1302|1303"
major_housing <- "1401|1406|1409"
major_business <- "1520|1521|1525"
major_defense <- "1602|1604|1606|1609|1619"
major_technology <- "1709"
major_foreigntrade <- "1800|1804"
major_intlaffairs <- "1901"
major_govtop <- "2011|2012"
major_religion <- "31"


# apply codings to study 2, first using gpt-generated topics, then likert items
coded_s2 <- strong %>%
  select(id, generic_flag, likert, topic) %>%
  mutate(topic = str_replace(str_replace(str_to_lower(topic),
                                         "topic: |the topic of the following statement is ",
                                         ""), "\\.", ""),
         topic = str_to_lower(str_replace_na(str_replace_all(topic, "\n", ""))),
         likert = str_to_lower(str_replace_na(likert)),
         text = str_c(topic, likert),
         subtopic = if_else(str_detect(topic, tag_inflation),
                         "101: Inflation and Interest Rates",
                 if_else(str_detect(topic, tag_unemployment),
                         "103: Unemployment Rate",
                 if_else(str_detect(topic, tag_natbudgetdebt) &
                          !str_detect(topic, "student|education"),
                         "105: National Budget and Debt",
                 if_else(str_detect(topic, tag_tax) & !str_detect(topic, "student"),
                         "107: Taxation, Tax policy, and Broad Tax Reform",
                 if_else(str_detect(topic, tag_pricecontrol),
                         "110: Price Control and Stabilization",
                 if_else(str_detect(topic, tag_econinequality) &
                          !str_detect(topic, "politician"),
                         "100: Economic Inequality",
                 if_else(str_detect(topic, tag_generalmacro) &
                          !str_detect(topic, "education|student"),
                         "100: General Domestic Macroeconomic Issues",
                 if_else(str_detect(topic, tag_racism),
                         "201: Ethnic Minority and Racial Group Discrimination",
                 if_else(str_detect(topic, tag_gendersex) &
                          !str_detect(topic, "transport"),
                         "202: Gender, Identity, and Sexual Orientation Discrimination",
                 if_else(str_detect(topic, tag_abortion) &
                         !str_detect(topic, "trans"),
                         "202: Abortion",
                 if_else(str_detect(topic, tag_votingrights),
                         "206: Voting Rights, Participation, and Related Issues",
                 if_else(str_detect(topic, tag_speech),
                         "207: Speech Issues",
                 if_else(str_detect(topic, tag_firstamend),
                         "207: First Amendment (excl. Speech Issues)",
                 if_else(str_detect(topic, tag_reparations),
                         "299: Reparations",
                 if_else(str_detect(topic, tag_mentalhealth) &
                          !str_detect(topic, "gun control"),
                         "303: Mental Health",
                 if_else(str_detect(topic, tag_healthinsurance),
                         "302: Health Insurance",
                 if_else(str_detect(topic, tag_covid),
                         "331: Public Health (COVID-19)",
                 if_else(str_detect(topic, tag_vaccine),
                         "331: Public Health (Vaccines)",
                 if_else(str_detect(topic, tag_prenatal) &
                          !str_detect(topic, "abortion"),
                         "332: Children and Prenatal Care",
                 if_else(str_detect(topic, tag_healthcare) &
                          !str_detect(topic, "marijuana"),
                         "301: Comprehensive Health Care Reform",
                 if_else(str_detect(topic, tag_farmsubsidies),
                         "402: Govt Subsidies to Farmers and Ranchers",
                 if_else(str_detect(topic, tag_highered),
                         "601: Higher Education (incl. Student Debt)",
                 if_else(str_detect(topic, tag_education) &
                          !str_detect(topic, "lunch"),
                         "600: Education, General",
                 if_else(str_detect(topic, tag_climate),
                         "705: Climate Change and Pollution",
                 if_else(str_detect(topic, tag_environment),
                         "700: Environment, General",
                 if_else(str_detect(topic, tag_nuclear) &
                          !str_detect(text, "war"),
                         "801: Nuclear Energy",
                 if_else(str_detect(topic, tag_gasoil),
                         "803: Natural Gas and Oil",
                 if_else(str_detect(topic, tag_energy),
                         "800: Energy, General",
                 if_else(str_detect(topic, tag_immigration),
                         "900: Immigration",
                 if_else(str_detect(topic, tag_masstransport),
                         "1001: Mass Transportation and Safety",
                 if_else(str_detect(topic, tag_infrastructure),
                         "1010: Infrastructure",
                 if_else(str_detect(topic, tag_transportation),
                         "1000: Transporation, General",
                 "Other")))))))))))))))))))))))))))))))))

coded_s2 <- coded_s2 %>%  # split due to contextstack overflow error
  mutate(subtopic = if_else(!str_detect(subtopic, "Other"), subtopic,
                   if_else(str_detect(topic, tag_marijuanalegal),
                           "1203: Marijuana Legalization",
                   if_else(str_detect(topic, tag_court),
                           "1204: Court Administration, Reform",
                   if_else(str_detect(topic, tag_criminaljustice),
                           "1205: Prisons",
                   if_else(str_detect(topic, tag_police),
                           "1209: Police",
                   if_else(str_detect(topic, tag_gun),
                           "1209: Gun Control",
                   if_else(str_detect(topic, tag_criminalcode),
                           "1210: Criminal and Civil Code (incl. Death Penalty)",
                   if_else(str_detect(topic, tag_lawcrime),
                           "1200: Law, Crime, and Family Issues, General",
                   if_else(str_detect(topic, tag_socialwelfare),
                           "1300: Social Welfare, General",
                   if_else(str_detect(topic, tag_schoollunch),
                           "1301: Nutrition Assistance (incl. School Lunch)",
                   if_else(str_detect(topic, tag_UBI),
                           "1302: Cash Assistance (incl. UBI)",
                   if_else(str_detect(topic, tag_elderlyassistance),
                           "1303: Elderly Issues and Assistance",
                   if_else(str_detect(topic, tag_homeless),
                           "1409: Homelessness",
                   if_else(str_detect(topic, tag_affordhous),
                           "1406: Low and Middle-Income Housing",
                   if_else(str_detect(topic, tag_housing),
                           "1401: Housing and Community Development",
                   if_else(str_detect(topic, tag_antitrust),
                           "1520: Anti-Trust, Corporate Governance",
                   if_else(str_detect(topic, tag_business),
                           "1521: Small Business Issues",
                   if_else(str_detect(topic, tag_consumerprivacy),
                           "1525: Consumer Privacy",
                   if_else(str_detect(topic, tag_alliances),
                           "1602: Defense Alliances",
                   if_else(str_detect(topic, tag_military),
                           "1604: Military Readiness, Spending",
                   if_else(str_detect(topic, tag_militaryaid),
                           "1606: Military Aid",
                   if_else(str_detect(topic, tag_veterans),
                           "1609: Veteran Affairs",
                   if_else(str_detect(topic, tag_war),
                           "1619: Direct War Related Issues",
                   if_else(str_detect(topic, tag_netneutrality),
                           "1709: Net Neutrality",
                   if_else(str_detect(topic, tag_foreigntrade),
                           "1800: Foreign Trade, General",
                   if_else(str_detect(topic, tag_offshoring),
                           "1804: Off-Shoring Jobs",
                   if_else(str_detect(topic, tag_foreignaid),
                           "1901: Foreign Aid",
                   if_else(str_detect(topic, tag_partypolitics),
                           "2011: National Party Politics",
                   if_else(str_detect(topic, tag_govtethics),
                           "2012: Government Ethics, Corruption",
                   if_else(str_detect(topic, tag_elections),
                           "2012: Election and Campaign Regulation",
                   if_else(str_detect(topic, tag_institutions),
                           "2011: Federal Institutions",
                   if_else(str_detect(topic, tag_religion),
                           "3100: Church and Religion",
                   if_else(str_detect(topic, tag_polarization),
                           "Polarization (Other)",
                   if_else(str_detect(topic, tag_democracy),
                           "Democracy (Other)",
                   if_else(str_detect(topic, tag_equality),
                           "200: Civil Rights, General",
                   "Other"))))))))))))))))))))))))))))))))))))

coded_s2 <- coded_s2 %>%  # split due to contextstack overflow error
 mutate(subtopic = if_else(!str_detect(subtopic, "Other"), subtopic,
                   if_else(str_detect(text, tag_inflation),
                           "101: Inflation and Interest Rates",
                   if_else(str_detect(text, tag_unemployment),
                           "103: Unemployment Rate",
                   if_else(str_detect(text, tag_natbudgetdebt) &
                            !str_detect(text, "student|education"),
                           "105: National Budget and Debt",
                   if_else(str_detect(text, tag_tax) & !str_detect(text, "student"),
                           "107: Taxation, Tax policy, and Broad Tax Reform",
                   if_else(str_detect(text, tag_pricecontrol),
                           "110: Price Control and Stabilization",
                   if_else(str_detect(text, tag_econinequality) &
                            !str_detect(text, "politician"),
                           "100: Economic Inequality",
                   if_else(str_detect(text, tag_generalmacro) &
                            !str_detect(text, "education|student"),
                           "100: General Domestic Macroeconomic Issues",
                   if_else(str_detect(text, tag_racism),
                           "201: Ethnic Minority and Racial Group Discrimination",
                   if_else(str_detect(text, tag_gendersex) &
                            !str_detect(text, "transport"),
                           "202: Gender, Identity, and Sexual Orientation Discrimination",
                   if_else(str_detect(text, tag_abortion) &
                            !str_detect(text, "trans"),
                           "202: Abortion",
                   if_else(str_detect(text, tag_votingrights),
                           "206: Voting Rights, Participation, and Related Issues",
                   if_else(str_detect(text, tag_speech),
                           "207: Speech Issues",
                   if_else(str_detect(text, tag_firstamend),
                           "207: First Amendment (excl. Speech Issues)",
                   if_else(str_detect(text, tag_reparations),
                           "299: Reparations",
                   if_else(str_detect(text, tag_mentalhealth) &
                            !str_detect(text, "gun control"),
                           "303: Mental Health",
                   if_else(str_detect(text, tag_healthinsurance),
                           "302: Health Insurance",
                   if_else(str_detect(text, tag_covid),
                           "331: Public Health (COVID-19)",
                   if_else(str_detect(text, tag_vaccine),
                           "331: Public Health (Vaccines)",
                   if_else(str_detect(text, tag_prenatal) &
                            !str_detect(text, "abortion"),
                           "332: Children and Prenatal Care",
                   if_else(str_detect(text, tag_healthcare) &
                            !str_detect(text, "marijuana"),
                           "301: Comprehensive Health Care Reform",
                   if_else(str_detect(text, tag_farmsubsidies),
                           "402: Govt Subsidies to Farmers and Ranchers",
                   if_else(str_detect(text, tag_highered),
                           "601: Higher Education (incl. Student Debt)",
                   if_else(str_detect(text, tag_education) &
                            !str_detect(text, "lunch"),
                           "600: Education, General",
                   if_else(str_detect(text, tag_climate),
                           "705: Climate Change and Pollution",
                   if_else(str_detect(text, tag_environment),
                           "700: Environment, General",
                   if_else(str_detect(text, tag_nuclear) &
                            !str_detect(text, "war"),
                           "801: Nuclear Energy",
                   if_else(str_detect(text, tag_gasoil),
                           "803: Natural Gas and Oil",
                   if_else(str_detect(text, tag_energy),
                           "800: Energy, General",
                   if_else(str_detect(text, tag_immigration),
                           "900: Immigration",
                   if_else(str_detect(text, tag_masstransport),
                           "1001: Mass Transportation and Safety",
                   if_else(str_detect(text, tag_infrastructure),
                           "1010: Infrastructure",
                   if_else(str_detect(text, tag_transportation),
                           "1000: Transporation, General",
                   "Other"))))))))))))))))))))))))))))))))))

coded_s2 <- coded_s2 %>%  # split due to contextstack overflow
 mutate(subtopic = if_else(!str_detect(subtopic, "Other"), subtopic,
                   if_else(str_detect(text, tag_marijuanalegal),
                           "1203: Marijuana Legalization",
                   if_else(str_detect(text, tag_court),
                           "1204: Court Administration, Reform",
                   if_else(str_detect(text, tag_criminaljustice),
                           "1205: Prisons",
                   if_else(str_detect(text, tag_police),
                           "1209: Police",
                   if_else(str_detect(text, tag_gun),
                           "1209: Gun Control",
                   if_else(str_detect(text, tag_criminalcode),
                           "1210: Criminal and Civil Code (incl. Death Penalty)",
                   if_else(str_detect(text, tag_lawcrime),
                           "1200: Law, Crime, and Family Issues, General",
                   if_else(str_detect(text, tag_socialwelfare),
                           "1300: Social Welfare, General",
                   if_else(str_detect(text, tag_schoollunch),
                           "1301: Nutrition Assistance (incl. School Lunch)",
                   if_else(str_detect(text, tag_UBI),
                           "1302: Cash Assistance (incl. UBI)",
                   if_else(str_detect(text, tag_elderlyassistance),
                           "1303: Elderly Issues and Assistance",
                   if_else(str_detect(text, tag_homeless),
                           "1409: Homelessness",
                   if_else(str_detect(text, tag_affordhous),
                           "1406: Low and Middle-Income Housing",
                   if_else(str_detect(text, tag_housing),
                           "1401: Housing and Community Development",
                   if_else(str_detect(text, tag_antitrust),
                           "1520: Anti-Trust, Corporate Governance",
                   if_else(str_detect(text, tag_business),
                           "1521: Small Business Issues",
                   if_else(str_detect(text, tag_consumerprivacy),
                           "1525: Consumer Privacy",
                   if_else(str_detect(text, tag_alliances),
                           "1602: Defense Alliances",
                   if_else(str_detect(text, tag_military),
                           "1604: Military Readiness, Spending",
                   if_else(str_detect(text, tag_militaryaid),
                           "1606: Military Aid",
                   if_else(str_detect(text, tag_veterans),
                           "1609: Veteran Affairs",
                   if_else(str_detect(text, tag_war),
                            "1619: Direct War Related Issues",
                   if_else(str_detect(text, tag_netneutrality),
                           "1709: Net Neutrality",
                   if_else(str_detect(text, tag_foreigntrade),
                           "1800: Foreign Trade, General",
                   if_else(str_detect(text, tag_offshoring),
                           "1804: Off-Shoring Jobs",
                   if_else(str_detect(text, tag_foreignaid),
                           "1901: Foreign Aid",
                   if_else(str_detect(text, tag_partypolitics),
                           "2011: National Party Politics",
                   if_else(str_detect(text, tag_govtethics),
                           "2012: Government Ethics, Corruption",
                   if_else(str_detect(text, tag_elections),
                           "2012: Election and Campaign Regulation",
                   if_else(str_detect(text, tag_institutions),
                           "2011: Federal Institutions",
                   if_else(str_detect(text, tag_religion),
                           "3100: Church and Religion",
                   if_else(str_detect(text, tag_polarization),
                           "Polarization (Other)",
                   if_else(str_detect(text, tag_democracy),
                           "Democracy (Other)",
                   if_else(str_detect(text, tag_equality),
                           "200: Civil Rights, General",
                   "Other"))))))))))))))))))))))))))))))))))))

# code major topics
coded_s2 <- coded_s2 %>%
 mutate(majortopic = if_else(str_detect(subtopic, major_macroecon),
                             "Macroeconomics",
                     if_else(str_detect(subtopic, major_civilrights),
                             "Civil Rights, Minority Issues, and Civil Liberties",
                     if_else(str_detect(subtopic, major_health),
                             "Health",
                     if_else(str_detect(subtopic, major_agriculture),
                             "Agriculture",
                     if_else(str_detect(subtopic, major_education),
                             "Education",
                     if_else(str_detect(subtopic, major_environment),
                             "Environment",
                     if_else(str_detect(subtopic, major_energy),
                             "Energy",
                     if_else(str_detect(subtopic, major_immigration),
                             "Immigration",
                     if_else(str_detect(subtopic, major_transportation),
                             "Transportation",
                     if_else(str_detect(subtopic, major_law),
                             "Law, Crime, and Family Issues",
                     if_else(str_detect(subtopic, major_welfare),
                             "Social Welfare",
                     if_else(str_detect(subtopic, major_housing),
                             "Community Development and Housing Issues",
                     if_else(str_detect(subtopic, major_business),
                             "Domestic Commerce",
                     if_else(str_detect(subtopic, major_defense),
                             "Defense",
                     if_else(str_detect(subtopic, major_intlaffairs),
                             "Int'l Affairs and Foreign Aid",
                     if_else(str_detect(subtopic, major_govtop),
                             "Government Operations",
                     if_else(str_detect(subtopic, major_religion),
                             "Church and Religion", "Other"))))))))))))))))))


# Prepare analysis of Appendix B1

s1 <- read_csv("data/study1_handcoded.csv") %>%
  transmute(subtopic, study = "Experiment 1", size = n())

s2 <- coded_s2 %>%
  transmute(subtopic, study = "Experiment 2", size = n())

s3 <- read_csv("data/study3_handcoded.csv") %>%
  transmute(subtopic, study = "Experiment 3", size = n())

full <- rbind(s1, s2, s3)
full <- rbind(full, mutate(full, study = "Total",
                           size = (nrow(s1) + nrow(s2) + nrow(s3))))

stats_subtopic <- full %>%
  group_by(subtopic, study, size) %>%
  summarize(count = n()) %>%
  mutate(prop = count/size,
         chart = ifelse(study == "Total", "Combined", "By Experiment"))
